import json
import tensorflow as tf


def get_data():
    '''
    获取 句子数据集 和 标签【句意是否为讽刺】
    :return:
    '''
    with open("./data/sarcasm.json") as f:
        datastore = json.load(f)

    sentences = []
    labels = []
    urls = []
    for item in datastore:
        sentences.append(item['headline'])
        labels.append(item['is_sarcastic'])
        urls.append(item['article_link'])

    # print(sentences)
    # print(labels)
    # print(urls)

    return sentences, labels

if __name__ == '__main__':

    sentences, labels = get_data()


    tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token="<oov>")
    tokenizer.fit_on_texts(sentences)
    word_index = tokenizer.word_index

    sequences = tokenizer.texts_to_sequences(sentences)
    padded = tf.keras.preprocessing.sequence.pad_sequences(sequences, padding='post')
    print(padded[0])
    print(padded.shape)
